
# Quantification using RSEM
# Author: Nanda Yellapu, Dept of Biostatistics & Data Science, University of Kansas Medical Center, KS, USA
# Collaborator: Kalyani Pyaram


path <- "/panfs/pfs.local/work/biostat/nyellapu/Kalyani/Sep_2023/Raw_data/concatenated_files/"
samples <- scan("samples.txt", what="", sep="\n")
thread.num <- 8

for (sample in samples[1:18]){
  # mapper using RSEM -p for thread
  system(paste("mkdir ", sample, sep = ""))
  rsem <- paste('rsem-calculate-expression -p ', thread.num, ' --bowtie2 --paired-end --output-genome-bam ', 
                path, sample, "_R1.fastq ",  
                path, sample, "_R2.fastq /panfs/pfs.local/work/biostat/d324p169/reference/fridleyRef/ucsc-mm10-rsem/mm10-rsem ", 
                sample, "/", sample, sep = "")
  
  system(rsem)
}

# Generate the count matrix

summary.ret <- function(path, samples){
  
  # combine rsem result
  all.samples.ret <- paste(path, samples, '/',samples, '.genes.results ', sep = "")
  collapse.ret <- paste(all.samples.ret, collapse = "")
  combine.gene <- paste('rsem-generate-data-matrix ', 
                        collapse.ret,
                        '> ', path, '/genes.count.matrix', sep = "")
  system(combine.gene)
}
